project_2_numdata =
read_csv("./data/project_2_data.csv", na = c("NA", "", ".")) |>
janitor::clean_names() |>
mutate(status = ifelse(status == "Dead", 1, 0),
t_stage = case_when(
t_stage == "T1" ~ 1,
t_stage == "T2" ~ 2,
t_stage == "T3" ~ 3,
t_stage == "T4" ~ 4,
TRUE ~ NA_real_),
n_stage = case_when(
n_stage == "N1" ~ 1,
n_stage == "N2" ~ 2,
n_stage == "N3" ~ 3,
TRUE ~ NA_real_),
x6th_stage_num = case_when(
x6th_stage == "IIA" ~ 1,
x6th_stage == "IIB" ~ 2,
x6th_stage == "IIIA" ~ 3,
x6th_stage == "IIIB" ~ 4,
x6th_stage == "IIIC" ~ 5,
TRUE ~ NA_real_),
differentiate = case_when(
differentiate == "Well differentiated" ~ 1,
differentiate == "Moderately differentiated" ~ 2,
differentiate == "Poorly differentiated" ~ 3,
differentiate == "Undifferentiated" ~ 4,
TRUE ~ NA_real_),
grade = case_when(
grade == "anaplastic; Grade IV" ~ 4,
grade == "3" ~ 3,
grade == "2" ~ 2,
grade == "1" ~ 1,
TRUE ~ NA_real_),
a_stage_regional = ifelse(a_stage == "Regional", 1, 0),
estrogen_status = ifelse(estrogen_status == "Positive", 1, 0),
progesterone_status = ifelse(progesterone_status == "Positive", 1, 0)
) |>
select(-a_stage)
## Rows: 4024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Race, Marital Status, T Stage, N Stage, 6th Stage, differentiate, ...
## dbl (5): Age, Tumor Size, Regional Node Examined, Reginol Node Positive, Su...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
surv_object <- Surv(time = project_2_numdata$survival_months, event = project_2_numdata$status)
variables <- names(project_2_numdata)[!names(project_2_numdata) %in% c("survival_months", "status")]
for (var in variables) {
formula <- as.formula(paste("Surv(survival_months, status) ~", var))
model <- coxph(formula, data = project_2_numdata)
print(var)
print(summary(model))
}
## [1] "age"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.015659 1.015783 0.004624 3.386 0.000708 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.016 0.9845 1.007 1.025
##
## Concordance= 0.545 (se = 0.013 )
## Likelihood ratio test= 11.62 on 1 df, p=7e-04
## Wald test = 11.47 on 1 df, p=7e-04
## Score (logrank) test = 11.5 on 1 df, p=7e-04
##
## [1] "race"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## raceOther -0.9978 0.3687 0.2098 -4.756 1.97e-06 ***
## raceWhite -0.5997 0.5490 0.1252 -4.791 1.66e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## raceOther 0.3687 2.712 0.2444 0.5562
## raceWhite 0.5490 1.822 0.4296 0.7016
##
## Concordance= 0.541 (se = 0.008 )
## Likelihood ratio test= 27.23 on 2 df, p=1e-06
## Wald test = 29.95 on 2 df, p=3e-07
## Score (logrank) test = 31 on 2 df, p=2e-07
##
## [1] "marital_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## marital_statusMarried -0.33465 0.71559 0.11792 -2.838 0.00454 **
## marital_statusSeparated 0.74830 2.11341 0.27896 2.682 0.00731 **
## marital_statusSingle -0.08987 0.91405 0.14397 -0.624 0.53249
## marital_statusWidowed 0.13768 1.14761 0.17754 0.776 0.43804
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## marital_statusMarried 0.7156 1.3975 0.5679 0.9017
## marital_statusSeparated 2.1134 0.4732 1.2233 3.6512
## marital_statusSingle 0.9141 1.0940 0.6893 1.2120
## marital_statusWidowed 1.1476 0.8714 0.8104 1.6252
##
## Concordance= 0.549 (se = 0.011 )
## Likelihood ratio test= 26.45 on 4 df, p=3e-05
## Wald test = 30.18 on 4 df, p=4e-06
## Score (logrank) test = 31.6 on 4 df, p=2e-06
##
## [1] "t_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## t_stage 0.46842 1.59747 0.04741 9.88 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## t_stage 1.597 0.626 1.456 1.753
##
## Concordance= 0.6 (se = 0.011 )
## Likelihood ratio test= 91.12 on 1 df, p=<2e-16
## Wald test = 97.61 on 1 df, p=<2e-16
## Score (logrank) test = 99.3 on 1 df, p=<2e-16
##
## [1] "n_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## n_stage 0.76586 2.15085 0.04779 16.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## n_stage 2.151 0.4649 1.959 2.362
##
## Concordance= 0.651 (se = 0.011 )
## Likelihood ratio test= 230.8 on 1 df, p=<2e-16
## Wald test = 256.9 on 1 df, p=<2e-16
## Score (logrank) test = 286.2 on 1 df, p=<2e-16
##
## [1] "x6th_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x6th_stageIIB 0.5223 1.6858 0.1335 3.912 9.17e-05 ***
## x6th_stageIIIA 0.9398 2.5594 0.1259 7.462 8.50e-14 ***
## x6th_stageIIIB 1.4959 4.4634 0.2458 6.085 1.16e-09 ***
## x6th_stageIIIC 1.8480 6.3474 0.1263 14.627 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## x6th_stageIIB 1.686 0.5932 1.298 2.190
## x6th_stageIIIA 2.559 0.3907 2.000 3.276
## x6th_stageIIIB 4.463 0.2240 2.757 7.226
## x6th_stageIIIC 6.347 0.1575 4.955 8.131
##
## Concordance= 0.669 (se = 0.011 )
## Likelihood ratio test= 248.2 on 4 df, p=<2e-16
## Wald test = 262.1 on 4 df, p=<2e-16
## Score (logrank) test = 310.5 on 4 df, p=<2e-16
##
## [1] "differentiate"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## differentiate 0.65269 1.92070 0.06485 10.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## differentiate 1.921 0.5206 1.691 2.181
##
## Concordance= 0.609 (se = 0.011 )
## Likelihood ratio test= 103.2 on 1 df, p=<2e-16
## Wald test = 101.3 on 1 df, p=<2e-16
## Score (logrank) test = 102.5 on 1 df, p=<2e-16
##
## [1] "grade"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## grade 0.65269 1.92070 0.06485 10.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## grade 1.921 0.5206 1.691 2.181
##
## Concordance= 0.609 (se = 0.011 )
## Likelihood ratio test= 103.2 on 1 df, p=<2e-16
## Wald test = 101.3 on 1 df, p=<2e-16
## Score (logrank) test = 102.5 on 1 df, p=<2e-16
##
## [1] "tumor_size"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## tumor_size 0.013357 1.013446 0.001521 8.781 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## tumor_size 1.013 0.9867 1.01 1.016
##
## Concordance= 0.609 (se = 0.012 )
## Likelihood ratio test= 65.65 on 1 df, p=5e-16
## Wald test = 77.1 on 1 df, p=<2e-16
## Score (logrank) test = 78.53 on 1 df, p=<2e-16
##
## [1] "estrogen_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## estrogen_status -1.2994 0.2727 0.1060 -12.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## estrogen_status 0.2727 3.667 0.2215 0.3357
##
## Concordance= 0.572 (se = 0.008 )
## Likelihood ratio test= 112.5 on 1 df, p=<2e-16
## Wald test = 150.1 on 1 df, p=<2e-16
## Score (logrank) test = 172.5 on 1 df, p=<2e-16
##
## [1] "progesterone_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## progesterone_status -0.95638 0.38428 0.08565 -11.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## progesterone_status 0.3843 2.602 0.3249 0.4545
##
## Concordance= 0.602 (se = 0.01 )
## Likelihood ratio test= 109.2 on 1 df, p=<2e-16
## Wald test = 124.7 on 1 df, p=<2e-16
## Score (logrank) test = 134.5 on 1 df, p=<2e-16
##
## [1] "regional_node_examined"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## regional_node_examined 0.011017 1.011078 0.004842 2.275 0.0229 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## regional_node_examined 1.011 0.989 1.002 1.021
##
## Concordance= 0.524 (se = 0.012 )
## Likelihood ratio test= 5.05 on 1 df, p=0.02
## Wald test = 5.18 on 1 df, p=0.02
## Score (logrank) test = 5.17 on 1 df, p=0.02
##
## [1] "reginol_node_positive"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## reginol_node_positive 0.077766 1.080869 0.004682 16.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## reginol_node_positive 1.081 0.9252 1.071 1.091
##
## Concordance= 0.659 (se = 0.012 )
## Likelihood ratio test= 191.2 on 1 df, p=<2e-16
## Wald test = 275.9 on 1 df, p=<2e-16
## Score (logrank) test = 300.4 on 1 df, p=<2e-16
##
## [1] "x6th_stage_num"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x6th_stage_num 0.45643 1.57842 0.02806 16.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## x6th_stage_num 1.578 0.6335 1.494 1.668
##
## Concordance= 0.669 (se = 0.011 )
## Likelihood ratio test= 247.8 on 1 df, p=<2e-16
## Wald test = 264.6 on 1 df, p=<2e-16
## Score (logrank) test = 286.1 on 1 df, p=<2e-16
##
## [1] "a_stage_regional"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 4024, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## a_stage_regional -1.1399 0.3199 0.1741 -6.547 5.86e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## a_stage_regional 0.3199 3.126 0.2274 0.4499
##
## Concordance= 0.522 (se = 0.005 )
## Likelihood ratio test= 31.23 on 1 df, p=2e-08
## Wald test = 42.87 on 1 df, p=6e-11
## Score (logrank) test = 47.71 on 1 df, p=5e-12
Based on the univariate Cox model results, the final model should include statistically significant and clinically relevant variables. These include age (p = 0.0007, HR = 1.016), race (p < 0.001, indicating disparities), marital status (significant for Married and Separated), t_stage (p < 2e-16, HR = 1.597), n_stage (p < 2e-16, HR = 2.151), differentiate (p < 2e-16, HR = 1.92), grade (p-value: < 2e-16, HR = 1.921), x6th_stage(p < 2e-16, HR = 1.578) tumor size (p < 2e-16, HR = 1.013), estrogen status (p < 2e-16, HR = 0.273), progesterone status (p < 2e-16, HR = 0.384), regional node positive (p < 2e-16, HR = 1.081), and a_stage (regional) (p < 2e-08, HR = 0.32). Variables like Single and Widowed marital status (p > 0.05) should be excluded. But we need further check multicollinearity and other for final dicision
cor_matrix <- cor(project_2_numdata[, sapply(project_2_numdata, is.numeric)])
print(cor_matrix)
## age t_stage n_stage differentiate
## age 1.000000000 -0.06691424 0.002883209 -0.09929371
## t_stage -0.066914236 1.00000000 0.277014535 0.13147030
## n_stage 0.002883209 0.27701454 1.000000000 0.16250134
## differentiate -0.099293714 0.13147030 0.162501337 1.00000000
## grade -0.099293714 0.13147030 0.162501337 1.00000000
## tumor_size -0.077214971 0.80917552 0.277904717 0.11936737
## estrogen_status 0.059787319 -0.06095383 -0.101994729 -0.21125105
## progesterone_status -0.021268794 -0.05763326 -0.093722800 -0.17986002
## regional_node_examined -0.033345483 0.11410205 0.328276140 0.08441631
## reginol_node_positive 0.012585513 0.24307493 0.838073333 0.13531890
## survival_months -0.009389560 -0.08571763 -0.139576527 -0.06766924
## status 0.055921310 0.15469948 0.255771945 0.16139784
## x6th_stage_num -0.018741051 0.60671104 0.881880081 0.18694337
## a_stage_regional 0.020859878 -0.22112353 -0.260572948 -0.03945904
## grade tumor_size estrogen_status
## age -0.09929371 -0.07721497 0.05978732
## t_stage 0.13147030 0.80917552 -0.06095383
## n_stage 0.16250134 0.27790472 -0.10199473
## differentiate 1.00000000 0.11936737 -0.21125105
## grade 1.00000000 0.11936737 -0.21125105
## tumor_size 0.11936737 1.00000000 -0.05958454
## estrogen_status -0.21125105 -0.05958454 1.00000000
## progesterone_status -0.17986002 -0.06987929 0.51330798
## regional_node_examined 0.08441631 0.10435180 -0.04483576
## reginol_node_positive 0.13531890 0.24232172 -0.08598523
## survival_months -0.06766924 -0.08690124 0.12846902
## status 0.16139784 0.13420512 -0.18465036
## x6th_stage_num 0.18694337 0.51397121 -0.10561449
## a_stage_regional -0.03945904 -0.12388287 0.06557043
## progesterone_status regional_node_examined
## age -0.02126879 -0.03334548
## t_stage -0.05763326 0.11410205
## n_stage -0.09372280 0.32827614
## differentiate -0.17986002 0.08441631
## grade -0.17986002 0.08441631
## tumor_size -0.06987929 0.10435180
## estrogen_status 0.51330798 -0.04483576
## progesterone_status 1.00000000 -0.01805070
## regional_node_examined -0.01805070 1.00000000
## reginol_node_positive -0.07806852 0.41157970
## survival_months 0.09601832 -0.02205421
## status -0.17707930 0.03477200
## x6th_stage_num -0.10124555 0.31721872
## a_stage_regional 0.02652963 -0.06901029
## reginol_node_positive survival_months status
## age 0.01258551 -0.00938956 0.05592131
## t_stage 0.24307493 -0.08571763 0.15469948
## n_stage 0.83807333 -0.13957653 0.25577194
## differentiate 0.13531890 -0.06766924 0.16139784
## grade 0.13531890 -0.06766924 0.16139784
## tumor_size 0.24232172 -0.08690124 0.13420512
## estrogen_status -0.08598523 0.12846902 -0.18465036
## progesterone_status -0.07806852 0.09601832 -0.17707930
## regional_node_examined 0.41157970 -0.02205421 0.03477200
## reginol_node_positive 1.00000000 -0.13521385 0.25663809
## survival_months -0.13521385 1.00000000 -0.47651426
## status 0.25663809 -0.47651426 1.00000000
## x6th_stage_num 0.77396309 -0.14483728 0.25763588
## a_stage_regional -0.23284888 0.07010906 -0.09658422
## x6th_stage_num a_stage_regional
## age -0.01874105 0.02085988
## t_stage 0.60671104 -0.22112353
## n_stage 0.88188008 -0.26057295
## differentiate 0.18694337 -0.03945904
## grade 0.18694337 -0.03945904
## tumor_size 0.51397121 -0.12388287
## estrogen_status -0.10561449 0.06557043
## progesterone_status -0.10124555 0.02652963
## regional_node_examined 0.31721872 -0.06901029
## reginol_node_positive 0.77396309 -0.23284888
## survival_months -0.14483728 0.07010906
## status 0.25763588 -0.09658422
## x6th_stage_num 1.00000000 -0.29196180
## a_stage_regional -0.29196180 1.00000000
cortable<-project_2_numdata|>
select(-race, -marital_status,-x6th_stage)
cortable$regional_node_examined
## [1] 24 14 14 2 3 18 11 9 20 21 9 11 13 23 16 20 1 22 16 20 15 15 4 18
## [25] 26 31 25 14 14 10 3 5 6 19 19 9 21 14 10 15 12 12 2 15 11 10 15 12
## [49] 16 8 17 9 7 20 16 13 19 49 15 17 24 12 1 20 16 5 23 20 5 12 12 24
## [73] 16 15 19 13 11 7 9 9 21 10 9 2 13 12 7 10 11 9 8 15 5 21 12 17
## [97] 4 10 3 14 33 25 3 7 14 2 11 12 8 30 15 3 34 14 6 12 9 16 15 15
## [121] 7 15 16 23 23 16 13 3 8 13 14 10 8 5 8 28 12 20 14 18 4 13 24 23
## [145] 20 15 18 13 1 13 18 14 15 14 14 16 15 6 2 14 18 3 2 20 16 3 12 24
## [169] 8 14 20 11 9 9 24 11 17 7 31 11 32 23 3 15 14 14 3 14 34 14 12 2
## [193] 19 5 16 5 16 10 22 11 14 16 1 17 2 6 24 18 12 6 11 17 18 9 10 4
## [217] 12 17 4 27 14 20 18 9 2 27 20 42 9 15 19 7 22 2 15 26 26 12 28 8
## [241] 29 30 9 18 15 18 9 18 15 11 6 5 2 13 19 20 13 7 20 12 11 9 9 3
## [265] 33 3 33 3 4 20 13 20 18 9 11 2 4 23 14 14 16 12 18 12 14 7 13 12
## [289] 30 41 39 17 11 19 14 10 1 11 21 13 18 23 30 46 25 10 22 10 4 19 4 15
## [313] 14 27 12 4 14 14 28 12 18 15 8 19 16 15 14 1 9 10 24 19 6 19 13 11
## [337] 4 13 19 13 12 19 17 9 23 12 5 16 29 17 24 16 6 7 1 18 23 5 3 20
## [361] 10 23 24 2 11 16 19 25 14 12 30 16 2 24 26 14 14 16 18 15 5 6 17 11
## [385] 22 3 16 10 14 7 20 14 21 18 40 20 14 6 18 8 31 13 14 7 13 20 12 9
## [409] 1 9 7 19 3 9 9 2 7 20 39 7 27 17 10 8 8 14 20 11 10 10 11 25
## [433] 1 9 15 19 9 2 25 5 21 17 10 29 20 3 26 6 11 14 2 2 17 9 14 19
## [457] 20 16 13 10 6 18 18 11 7 11 3 4 12 12 9 3 4 16 7 32 9 28 9 17
## [481] 18 6 25 16 1 23 9 18 17 18 51 15 7 10 18 23 23 16 19 20 24 15 5 4
## [505] 22 12 9 15 12 14 17 9 13 13 15 13 28 14 13 13 11 33 29 21 1 17 44 2
## [529] 16 3 38 14 18 16 17 24 18 7 21 23 23 13 3 9 33 5 15 16 22 20 47 15
## [553] 13 17 14 1 22 9 14 15 11 19 1 2 17 14 9 27 18 24 16 25 9 26 31 14
## [577] 16 14 14 5 13 14 19 11 29 19 19 11 10 12 29 4 15 19 13 10 4 13 15 18
## [601] 3 16 40 27 22 7 16 15 10 15 6 17 8 8 13 8 10 2 5 11 6 23 1 15
## [625] 24 8 17 4 15 13 26 2 18 54 28 2 6 1 2 2 13 5 14 27 28 11 18 17
## [649] 12 31 2 24 3 7 26 3 28 24 11 21 14 7 39 19 1 8 16 19 28 12 19 4
## [673] 19 12 23 17 17 13 10 13 14 22 11 9 18 32 12 13 8 19 16 12 32 20 14 8
## [697] 18 27 26 19 12 36 19 12 27 5 21 2 15 2 16 26 19 2 14 2 11 4 8 4
## [721] 14 20 20 14 27 29 24 5 7 22 17 8 22 16 23 10 12 16 26 15 14 11 9 1
## [745] 8 9 21 26 25 2 13 10 23 10 6 33 10 19 5 13 13 31 3 6 2 24 19 17
## [769] 2 13 11 19 17 12 7 18 15 20 16 10 2 6 10 30 9 11 18 3 2 15 38 2
## [793] 9 15 2 11 14 1 25 18 15 27 19 6 17 1 6 14 12 9 14 13 5 20 13 29
## [817] 21 2 1 9 2 1 20 19 26 24 14 6 13 17 5 1 14 26 3 23 5 7 36 15
## [841] 19 31 14 16 12 19 20 10 12 17 5 9 19 3 4 25 6 32 15 16 31 16 7 21
## [865] 18 15 20 10 31 13 3 3 17 6 21 17 16 20 10 16 25 15 6 17 6 18 25 10
## [889] 10 7 28 10 25 38 26 20 9 14 32 25 4 12 3 11 7 9 17 24 9 29 2 32
## [913] 13 18 4 18 7 19 3 13 19 19 27 18 8 26 10 5 1 23 10 11 16 12 15 13
## [937] 3 14 21 14 23 61 7 1 23 26 10 11 14 15 16 10 10 8 5 34 16 11 22 23
## [961] 2 15 11 32 21 12 14 19 10 14 9 17 9 20 5 10 32 2 9 18 5 19 9 13
## [985] 17 3 2 26 9 41 17 18 10 8 11 26 21 9 20 22 3 19 22 8 23 13 5 21
## [1009] 17 16 15 20 24 10 17 3 23 16 17 17 18 17 6 17 16 15 20 4 6 18 15 29
## [1033] 18 18 2 6 18 13 6 30 23 25 9 19 10 17 9 10 13 8 14 15 17 23 7 16
## [1057] 10 10 24 7 19 25 19 19 27 19 18 9 16 22 12 16 23 29 5 13 26 4 5 9
## [1081] 30 26 12 15 4 12 1 28 15 22 33 28 23 23 18 22 9 6 10 15 11 15 21 10
## [1105] 1 18 14 22 11 20 16 10 12 11 8 2 26 10 16 16 30 2 11 11 11 6 4 4
## [1129] 37 2 17 4 23 2 10 11 10 14 10 21 12 13 17 9 16 13 22 11 1 18 8 11
## [1153] 14 14 18 10 4 9 12 15 17 11 10 21 16 13 23 8 8 20 7 12 3 6 13 13
## [1177] 22 4 5 23 4 18 21 13 7 14 12 20 3 12 9 22 21 7 2 1 15 13 5 28
## [1201] 15 12 19 9 8 16 9 17 9 14 18 28 18 12 2 9 15 16 26 15 8 18 15 10
## [1225] 3 15 13 3 6 8 7 19 13 25 5 9 5 16 4 14 11 14 14 3 16 18 26 10
## [1249] 20 10 5 3 4 11 15 27 2 19 22 5 20 20 4 3 16 21 17 36 5 15 8 9
## [1273] 13 15 2 10 10 8 26 21 14 14 18 10 4 10 12 32 13 26 6 13 13 12 6 11
## [1297] 20 21 18 28 14 3 22 2 13 21 22 16 13 16 5 1 1 4 32 16 11 13 24 28
## [1321] 30 16 16 3 8 2 6 9 2 16 10 13 1 23 26 21 4 13 13 14 30 5 3 18
## [1345] 14 15 21 14 7 14 11 20 16 17 11 20 13 4 5 13 16 8 14 4 9 6 21 13
## [1369] 15 4 14 15 18 24 4 10 20 16 19 21 13 15 29 8 15 15 24 16 21 28 9 14
## [1393] 10 3 10 1 25 34 1 9 23 4 1 13 9 6 15 14 8 18 11 27 7 13 9 14
## [1417] 13 15 15 14 17 16 14 3 13 13 28 20 5 28 23 3 12 23 14 17 14 24 27 21
## [1441] 16 19 11 23 3 3 10 8 14 11 9 13 25 13 12 13 2 13 14 36 21 24 35 4
## [1465] 7 3 11 27 13 29 6 17 11 11 31 13 12 12 3 16 17 19 5 12 9 15 23 9
## [1489] 23 2 5 12 17 17 12 16 13 11 25 20 15 9 11 26 13 20 12 13 21 20 9 13
## [1513] 9 4 3 16 27 14 15 16 40 17 13 22 19 1 32 3 2 10 15 22 1 20 16 2
## [1537] 21 11 16 31 15 14 14 16 16 10 15 12 19 7 5 4 10 25 16 4 20 17 6 11
## [1561] 47 24 15 9 17 12 16 29 4 11 13 4 16 9 17 18 28 9 11 9 43 9 7 9
## [1585] 32 9 20 5 11 24 6 39 18 8 16 8 14 26 18 25 13 17 8 13 22 10 10 9
## [1609] 18 21 22 20 8 24 18 11 7 19 20 23 25 9 9 19 49 9 7 7 18 11 6 16
## [1633] 17 4 17 11 4 25 25 27 19 18 21 16 12 26 15 16 12 18 23 27 18 8 8 9
## [1657] 16 17 18 4 17 20 20 13 4 24 6 29 11 1 18 6 25 18 7 12 14 6 13 13
## [1681] 20 5 14 15 21 13 13 3 19 12 19 10 5 15 16 20 13 7 13 7 20 26 9 6
## [1705] 21 10 6 22 16 21 3 25 16 19 11 19 10 6 24 13 20 18 2 27 15 11 11 8
## [1729] 24 8 10 21 6 11 23 1 5 2 27 27 1 21 11 16 20 8 19 9 15 24 14 26
## [1753] 16 18 17 14 9 20 15 2 11 14 10 12 27 15 16 1 15 4 15 23 16 6 9 14
## [1777] 9 13 3 16 9 16 10 16 26 11 16 10 29 21 25 9 12 24 2 17 10 19 5 13
## [1801] 6 13 13 6 8 11 3 29 11 1 7 1 22 4 34 7 3 5 17 15 18 15 6 10
## [1825] 9 20 9 11 9 18 1 14 6 22 8 15 15 8 11 23 14 28 6 8 11 8 19 20
## [1849] 3 10 16 4 9 24 4 12 10 6 12 13 25 18 12 16 14 4 15 13 13 20 11 8
## [1873] 35 18 15 12 1 16 12 10 23 8 16 24 14 24 10 24 17 9 30 13 23 15 20 1
## [1897] 8 19 16 27 15 9 9 25 2 9 7 18 19 13 8 9 6 14 15 15 19 16 23 2
## [1921] 3 17 1 20 5 1 27 15 23 3 13 10 14 15 18 25 21 14 11 17 13 10 13 15
## [1945] 18 13 12 11 20 22 11 13 8 17 12 15 11 2 11 10 18 14 19 21 14 3 3 13
## [1969] 17 6 24 14 24 17 17 12 15 4 9 29 18 5 3 18 19 21 12 10 17 7 18 4
## [1993] 18 7 17 8 22 2 4 16 30 7 13 16 1 7 14 5 25 9 12 14 15 1 22 17
## [2017] 7 6 24 5 13 17 15 7 21 25 17 16 37 13 8 33 24 28 24 13 22 33 21 11
## [2041] 14 12 19 13 26 19 3 12 14 15 15 12 4 3 9 12 41 16 22 8 13 17 3 19
## [2065] 1 21 2 16 12 6 4 11 13 26 23 11 10 22 14 8 4 3 17 2 23 17 3 13
## [2089] 19 14 23 8 13 24 12 11 11 17 2 21 16 15 9 2 17 14 20 12 16 27 13 29
## [2113] 10 24 19 14 14 9 19 19 7 20 16 13 16 11 19 5 36 11 20 5 5 28 52 7
## [2137] 23 15 9 11 13 15 41 15 1 23 8 23 26 12 20 12 3 16 2 23 16 11 11 37
## [2161] 14 15 15 34 8 21 7 27 22 3 21 1 10 13 7 11 11 10 23 2 24 14 41 17
## [2185] 9 12 1 28 9 13 8 22 3 23 18 2 12 15 16 18 19 17 10 2 5 2 10 8
## [2209] 14 2 19 17 12 35 7 19 21 2 19 13 4 1 16 13 2 1 3 22 17 23 25 9
## [2233] 10 11 20 24 14 15 15 7 18 16 13 45 17 16 12 9 36 18 3 20 14 18 22 31
## [2257] 17 12 11 12 16 8 1 1 11 19 18 4 15 1 10 22 26 11 29 18 13 4 18 18
## [2281] 9 21 13 8 13 18 21 26 10 2 12 13 12 35 4 23 12 1 10 12 8 32 6 15
## [2305] 29 20 6 17 18 9 15 2 6 16 18 10 13 14 16 14 10 14 20 35 12 12 13 14
## [2329] 2 21 32 8 3 10 23 15 14 15 16 12 7 4 16 14 17 5 16 13 12 14 31 8
## [2353] 27 15 32 30 9 21 18 24 15 18 18 22 16 9 20 16 20 4 5 6 28 1 2 9
## [2377] 35 23 16 16 10 15 16 10 6 17 14 8 17 18 12 28 18 23 32 17 1 20 17 13
## [2401] 7 9 10 5 16 9 11 30 9 19 9 5 14 8 21 8 18 16 13 1 3 9 13 12
## [2425] 21 30 17 11 3 7 12 11 11 2 10 8 8 7 28 27 24 4 22 15 2 9 14 28
## [2449] 13 2 13 10 11 21 19 30 8 20 2 14 43 8 57 14 5 16 16 12 13 21 9 6
## [2473] 17 5 19 9 8 14 12 16 5 28 19 14 25 18 14 22 13 22 19 16 12 9 15 30
## [2497] 13 10 8 21 12 3 3 8 11 15 11 26 20 10 25 5 19 19 13 15 8 10 9 15
## [2521] 10 4 17 8 4 26 11 16 25 13 11 14 13 16 9 13 8 13 13 6 13 5 7 2
## [2545] 21 23 26 1 13 6 16 18 20 17 19 1 25 12 25 10 17 10 4 17 18 25 17 9
## [2569] 28 13 2 17 18 24 8 21 12 15 24 5 9 14 17 26 6 17 12 15 12 8 14 16
## [2593] 7 10 5 16 13 1 14 9 10 22 12 37 9 12 2 36 11 19 12 12 12 28 18 8
## [2617] 15 23 13 26 5 1 16 11 21 13 17 15 18 8 17 18 23 17 17 2 19 10 30 8
## [2641] 15 17 10 11 12 18 20 17 13 22 13 5 7 14 13 7 4 4 12 11 19 11 2 9
## [2665] 12 26 11 1 15 13 17 12 9 20 13 14 5 3 13 4 18 27 4 5 14 2 18 6
## [2689] 16 3 3 4 19 10 18 24 13 3 7 9 20 4 6 32 7 21 6 14 5 2 14 8
## [2713] 12 14 19 13 21 11 4 21 11 21 32 12 2 43 13 14 5 16 13 10 15 14 19 8
## [2737] 8 21 12 16 18 11 5 17 18 19 6 6 13 15 6 12 17 39 12 26 24 17 15 9
## [2761] 26 16 6 25 8 11 26 25 10 13 16 19 20 17 22 20 11 11 18 15 23 9 22 7
## [2785] 2 3 5 16 10 23 33 24 10 12 16 10 2 16 18 6 29 23 11 13 7 12 16 23
## [2809] 5 8 10 24 19 1 14 9 5 14 4 22 22 12 17 2 22 9 8 17 26 22 23 9
## [2833] 1 21 12 26 8 19 15 4 14 11 27 16 9 26 13 2 15 9 26 15 10 19 10 2
## [2857] 10 16 13 11 23 11 7 19 21 7 20 12 8 1 11 9 8 32 24 9 17 8 13 12
## [2881] 15 12 24 12 12 10 9 47 17 4 9 23 8 10 22 14 27 6 12 19 16 14 21 16
## [2905] 18 14 16 3 17 20 22 12 9 14 9 14 13 22 22 14 17 3 15 6 13 2 2 14
## [2929] 30 22 10 2 15 9 35 11 14 1 6 19 17 11 24 33 4 10 14 10 10 27 13 7
## [2953] 30 14 14 19 3 14 18 8 21 27 17 12 6 8 30 28 18 8 16 13 20 13 6 36
## [2977] 12 13 5 17 2 8 23 10 12 13 29 10 14 3 8 5 24 29 16 32 19 17 29 12
## [3001] 20 7 1 18 8 13 8 17 24 19 12 12 9 10 16 14 11 34 2 13 14 17 20 18
## [3025] 24 5 27 1 13 15 29 16 25 14 2 9 4 10 12 8 9 18 6 37 33 13 9 15
## [3049] 24 11 9 11 9 21 19 12 3 12 24 3 9 12 19 15 35 8 14 10 12 4 19 9
## [3073] 22 16 7 11 4 17 5 13 8 16 23 15 9 8 8 10 15 28 21 6 17 5 3 17
## [3097] 17 35 14 21 11 12 27 18 14 19 12 11 16 12 18 17 18 24 15 8 18 1 8 25
## [3121] 19 26 8 18 22 13 15 9 29 4 17 9 47 25 15 11 13 19 22 23 14 16 27 29
## [3145] 7 8 1 1 25 40 19 28 23 10 12 17 23 14 41 13 13 16 16 19 5 24 13 4
## [3169] 2 9 10 8 24 7 14 6 6 24 5 10 14 20 13 26 21 13 14 11 9 11 15 12
## [3193] 5 18 30 4 17 10 13 1 19 24 4 26 17 8 19 20 16 13 16 25 19 16 3 12
## [3217] 15 14 37 2 9 18 11 1 27 15 1 16 1 26 22 16 10 18 12 11 4 21 7 9
## [3241] 6 21 12 21 17 22 5 14 5 2 17 18 2 11 18 11 11 6 10 13 17 15 4 15
## [3265] 26 33 19 29 18 10 5 11 9 22 15 14 13 2 8 9 15 16 2 12 15 5 16 8
## [3289] 47 10 13 15 9 5 8 15 13 13 27 13 20 13 20 21 4 25 8 6 2 20 15 11
## [3313] 15 21 1 20 16 21 2 16 5 10 20 12 19 15 35 23 3 9 21 19 21 17 5 24
## [3337] 14 27 14 13 19 8 3 26 2 2 18 3 18 21 18 7 18 20 16 22 11 21 10 13
## [3361] 9 10 4 17 12 16 15 9 16 19 14 11 17 7 23 13 13 16 3 5 9 10 16 27
## [3385] 15 13 31 13 10 15 6 23 17 23 4 20 4 12 14 9 8 30 5 15 13 2 7 16
## [3409] 7 14 13 29 13 1 21 8 12 3 8 9 6 7 2 12 17 16 14 5 18 11 16 24
## [3433] 11 21 7 16 16 14 37 11 1 17 6 15 3 1 15 6 10 18 5 12 23 6 51 20
## [3457] 5 14 12 11 16 8 21 34 10 5 27 14 17 19 3 2 10 14 11 9 18 12 16 22
## [3481] 18 18 12 8 13 13 13 4 19 16 6 12 7 51 11 9 12 16 17 16 11 21 15 4
## [3505] 16 25 5 27 18 9 19 10 10 23 20 22 1 5 17 11 14 24 19 6 14 11 12 14
## [3529] 6 19 17 14 14 18 13 30 29 8 17 19 4 10 23 8 17 9 3 12 23 20 12 8
## [3553] 7 1 15 13 10 13 21 20 6 13 13 3 12 27 11 14 19 27 4 2 9 13 9 12
## [3577] 13 18 12 13 1 13 31 14 3 20 3 12 6 19 12 8 13 2 12 28 16 10 3 2
## [3601] 14 24 5 18 21 19 22 20 18 26 28 10 17 24 5 4 18 13 4 12 31 20 18 17
## [3625] 15 10 15 1 18 3 10 24 8 16 2 16 12 13 4 6 26 6 11 7 11 13 26 6
## [3649] 17 8 18 21 28 28 11 4 8 17 24 17 8 13 2 16 18 13 3 10 15 13 13 30
## [3673] 12 17 26 16 10 30 23 12 1 22 9 13 8 28 21 17 20 2 13 18 8 11 18 6
## [3697] 14 3 18 42 14 18 21 19 10 7 11 17 38 24 12 13 9 20 11 15 12 6 12 16
## [3721] 20 11 16 8 11 4 12 10 6 11 7 2 19 21 15 7 16 19 2 11 2 22 3 15
## [3745] 15 17 16 17 12 6 2 14 25 25 16 9 17 6 21 2 23 26 13 11 19 14 12 11
## [3769] 15 25 11 47 19 10 14 9 11 2 16 19 9 16 15 7 3 13 6 15 18 19 12 28
## [3793] 22 18 2 19 20 16 23 16 4 20 16 1 18 21 2 1 8 20 9 7 31 21 3 15
## [3817] 28 8 9 11 22 8 26 16 25 22 10 13 28 18 26 13 12 11 11 5 16 1 19 21
## [3841] 27 18 18 3 6 23 2 20 2 22 14 6 18 11 22 17 7 7 3 11 16 4 12 1
## [3865] 25 31 7 19 21 14 18 12 28 11 14 16 15 17 13 17 17 5 20 2 16 7 12 16
## [3889] 21 14 16 18 8 40 23 17 15 25 32 11 10 9 13 8 17 3 13 5 23 14 19 24
## [3913] 13 6 18 29 14 7 4 24 22 9 10 36 22 20 10 29 7 12 14 23 20 24 27 16
## [3937] 24 4 4 3 14 8 31 10 1 16 1 9 25 13 60 10 10 9 5 13 12 37 24 28
## [3961] 28 4 18 14 1 23 25 14 14 10 15 6 7 12 16 13 9 18 31 21 22 13 20 14
## [3985] 19 22 6 12 23 12 39 13 23 27 19 17 21 15 11 12 13 16 5 21 19 14 18 10
## [4009] 19 20 6 24 20 10 22 4 29 2 11 1 14 11 11 7
chart.Correlation(cortable, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
We have to choice 1 between differentiate - grade t stage - tumor size n
stage - regional_node_possitive - x6th_stage_num
muldata = project_2_numdata |>
select(-differentiate, -x6th_stage_num)
cox_model <- coxph(Surv(survival_months, status) ~.,
data = muldata)
vifs <- vif(cox_model)
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
## GVIF Df GVIF^(1/(2*Df))
## age 1.108043 1 1.052636
## race 1.083424 2 1.020234
## marital_status 1.227326 4 1.025935
## t_stage 4.289067 1 2.071006
## n_stage 16.221966 1 4.027650
## x6th_stage 38.451047 4 1.578024
## grade 1.138415 1 1.066965
## tumor_size 2.829424 1 1.682089
## estrogen_status 1.623845 1 1.274302
## progesterone_status 1.546881 1 1.243737
## regional_node_examined 1.726548 1 1.313982
## reginol_node_positive 4.397039 1 2.096912
## a_stage_regional 1.230259 1 1.109171
The VIF analysis reveals significant multicollinearity for n_stage (VIF = 10.94, GVIF^(1/(2Df)) =3.31), x6th_stage_num (VIF = 1.56, GVIF^(1/(2Df)) =1.25), reginol_node_positive (VIF = 4.13, GVIF^(1/(2Df)) =2.03), and t_stage (VIF = 3.83, GVIF^(1/(2Df)) =1.96), indicating redundancy. Tumor_size shows moderate multicollinearity (VIF = 2.62, GVIF^(1/(2Df)) =1.62), while other variables have acceptable VIFs near 1. Variables with high multicollinearity should be reconsidered for exclusion. also shows we need to remove. So I decided to remove t_stage, n_stage, reginol_node_positive,
final = muldata |>
select(-t_stage, -n_stage, -reginol_node_positive)
cox_model <- coxph(Surv(survival_months, status) ~.,
data = final)
vifs <- vif(cox_model)
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
## GVIF Df GVIF^(1/(2*Df))
## age 1.108302 1 1.052759
## race 1.060132 2 1.014705
## marital_status 1.149711 4 1.017592
## x6th_stage 1.930448 4 1.085694
## grade 1.133430 1 1.064627
## tumor_size 1.328934 1 1.152794
## estrogen_status 1.585994 1 1.259363
## progesterone_status 1.528070 1 1.236151
## regional_node_examined 1.222324 1 1.105587
## a_stage_regional 1.197044 1 1.094095
library(survival)
ph_test <- cox.zph(cox_model)
print(ph_test)
## chisq df p
## age 0.144 1 0.704
## race 1.296 2 0.523
## marital_status 2.191 4 0.701
## x6th_stage 4.280 4 0.369
## grade 2.425 1 0.119
## tumor_size 1.216 1 0.270
## estrogen_status 30.810 1 2.8e-08
## progesterone_status 32.831 1 1.0e-08
## regional_node_examined 0.188 1 0.665
## a_stage_regional 4.845 1 0.028
## GLOBAL 54.550 17 8.1e-06
plot(ph_test)
The Cox model assumes that hazard ratios are constant over time. A non-significant p-value (p > 0.05) indicates that the PH assumption holds. As GLOBAL 50.520 14 5.0e-06, the model did not meet the assumption, as same as estrogen_status, progesterone_status, and a_stage_regional. We need further improve our model.
finalmodel <- coxph(Surv(survival_months, status) ~ age + race + marital_status +
grade + tumor_size +
regional_node_examined + x6th_stage,
data = final)
ph_test <- cox.zph(finalmodel)
print(ph_test)
## chisq df p
## age 0.0977 1 0.755
## race 1.4006 2 0.496
## marital_status 2.6540 4 0.617
## grade 3.0281 1 0.082
## tumor_size 1.6594 1 0.198
## regional_node_examined 0.2829 1 0.595
## x6th_stage 4.3089 4 0.366
## GLOBAL 12.6117 14 0.557
plot(ph_test)
cox_model$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 1.435780e+06 5.080060e+05 1.000000e+00 2.219000e+03 0.000000e+00 7.386511e-01
## std
## 1.058639e-02
finalmodel$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 1.391238e+06 5.525470e+05 2.000000e+00 2.219000e+03 0.000000e+00 7.157363e-01
## std
## 1.074139e-02
No Clear Trends: If the solid line remains flat (close to zero), it indicates that the PH assumption is satisfied for that variable. Upward/Downward Trends: A visible trend or deviation indicates that the proportional hazards assumption may be violated for the corresponding variable, suggesting time-dependent effects.
A C-index of 0.716 suggests that the model has good discriminatory power, meaning it can correctly rank the survival times for about 71.6% of the pairs.The standard error is 0.01075, indicating a narrow range of variability in the concordance estimate, suggesting robust performance. The reduction in the C-index (from 73.9% to 71.6%) indicates a trade-off between model complexity and performance.
dev_residuals <- residuals(cox_model, type = "deviance")
plot(dev_residuals, main = "Deviance Residuals", ylab = "Residuals", xlab = "Index")
abline(h = c(-2, 2), col = "red", lty = 2)
surv_fit <- survfit(Surv(survival_months, status) ~ 1, data = final)
plot(surv_fit, xlab = "Time (months)", ylab = "Survival Probability",
main = "Survival Curve for the Final Model", col = "blue", lwd = 2)
grid()